#include <stdio.h>
#include <stdlib.h>
#include <assert.h>


	/*
	 * This module contains the core execution routines for a SPARC v9
	 * processor.
	 * These augment the generic instructions implemented in the
	 * core of the simulator.
	 *
	 * Moreover this module implements all the generic SPARC v9 operations
	 * from register window manipulations to ASI operations, as well
	 * as managing the processor traps, trap tables and execution state.
	 */


#include "basics.h"
#include "allocate.h"
#include "simcore.h"
#include "config.h"
#include "tsparcv9.h"
#include "tsparcv9internal.h"
#include "sparcv9regs.h"
#include "sparcv9cc.h"
#include "sparcv9decode.h"
#include "xicache.h"
#include "magictraps.h"
#include "breakpoint.h"
#include "rotlog.h"
#include "fatal.h"

#define	ss_get_fsr(_sp)	(_sp->v9_fsr_ctrl | (_sp->v9_fsr_tem<<V9_FSR_TEM_BIT) | _sp->v9_fsr_exc)


	/*
	 * Prefines here ..
	 */



	/*
	 * Initialisation support functions
	 */

sparcv9_cpu_t * sparcv9_cpu_alloc(domain_t * domainp,
		config_proc_t * config_procp,
                int nwins, int nglobals, int maxtl, uint64_t ver,
		bool_t has_fpu, proc_debug_t *proc_debugp,
		void * magicptr)
{
	sparcv9_cpu_t * v9p;
	simcpu_t * sp;
	int core_thread, i;

	v9p = Xcalloc(1, sparcv9_cpu_t);

	v9p->nwins = nwins;

	/*
	 * nwins_mask needs to be a mask of the number of
	 * bits needed to store nwins. We use this mask
	 * when we write a new value of nwins to ensure
	 * that only th bits implemented by nwins in the
	 * chip get set.
	 */
	i = 1;
	while (i < nwins)
		i <<= 1;
	v9p->nwins_mask = (i - 1);

	v9p->nglobals = nglobals;

	v9p->globalsp = Xcalloc(8*nglobals, uint64_t);
	v9p->winsp = Xcalloc(16*nwins, uint64_t);

	v9p->active_window = -1;
	v9p->active_global = -1;

	ASSERT(maxtl<SPARCv9_TLSPACE);
	v9p->maxtl = maxtl;	
	v9p->ver = ver;

	v9p->has_fpu = has_fpu;
	v9p->fpu_on = false;			/* make all the FPU info be consistent */
	v9p->pstate.fpu_enabled = false;
	v9p->fprs.fef = false;

	v9p->tl = 0;
	v9p->gl = 0;
	v9p->cwp = 0;

	sp = sim_cpu_alloc(config_procp, (void*)v9p);

	sp->proc_debugp = proc_debugp;

		/*
		 * Initialize stuff the simcpu_t
		 * is likely to use
		 */


		/* setup the call backs for simcpu_t */
SANITY(	sp->xic_miss = NULL; );		/* CPU specific - so force an error if not fixed */
SANITY(	sp->xicachep = NULL; );		/* CPU specific - so force an error if not fixed */


SANITY(	sp->xdc.miss = NULL; );

	sp->decodemep = sparcv9_decode_me;
	sp->decodemedatap = NULL;	/* none yet for us ... */
					/* eventually some v9 specifics */


	sp->v9_ccr = 0;		/* sparc v9 condition codes stored in simcpu_t */


	v9p->simp = sp;
	v9p->state = V9_UnInitialised;	/* Need a trap to get rid of this */

	return v9p;
}







	/*************************************************************
	 *
	 * Performance measurement functions
	 *
	 *************************************************************/


void sparcv9_perf_dump(void * ptr)
{
#if PERFORMANCE_CHECK	/* { */
	sparcv9_cpu_t * v9p = ptr;
	simcpu_t * sp;
	int t;
	int cid = v9p->simp->gid;
	double scale;
	simcycle_t diff, icount;
	uint64_t rtotal, utotal, ptotal, htotal;
	uint64_t xic_hits, xic_misses, xdc_hits, xdc_misses;

	sp = v9p->simp;

	/* Instruction counts */
	icount = ICOUNT(sp);
	scale = 100.0 / (double)icount;
	diff = icount - v9p->perf.enter_icount;

	rtotal = v9p->perf.icount[V9_RED] + ((v9p->state == V9_RED)?diff : 0);
	utotal = v9p->perf.icount[V9_User] + ((v9p->state == V9_User)?diff : 0);
	ptotal = v9p->perf.icount[V9_Priv] + ((v9p->state == V9_Priv)?diff : 0);
	htotal = v9p->perf.icount[V9_HyperPriv] + ((v9p->state == V9_HyperPriv)?diff : 0);

	/* xdcache statistics */
	xdc_hits = sp->xdc_hits - sp->prev_xdc_hits;
	xdc_misses = sp->xdc_misses - sp->prev_xdc_misses;

	/* xicache statistics */
	sp->xic_hits = icount - sp->xic_misses;
	xic_hits = sp->xic_hits - sp->prev_xic_hits;
	xic_misses = sp->xic_misses - sp->prev_xic_misses;

	lprintf(cid, "Instn cnts: R=%lld (%.2llf%%), H=%lld (%.2llf%%), "\
		"P=%lld (%.2llf%%), U=%lld (%.2llf%%), Total=%lld\n",
			rtotal, scale * (double)rtotal,
			htotal, scale * (double)htotal,
			ptotal, scale * (double)ptotal,
			utotal, scale * (double)utotal,
			rtotal + utotal + ptotal + htotal);

	ASSERT((rtotal + utotal + ptotal + htotal) == ICOUNT(sp));

        lprintf(cid, "xdcache: hits=%lld (%.2llf%%), misses=%lld (%.2llf%%), "\
		"avg_hits=(%.2llf%%)\n",
		xdc_hits,
		100.0 * xdc_hits / (double)(xdc_hits + xdc_misses),
		xdc_misses,
		100.0 * xdc_misses / (double)(xdc_hits + xdc_misses),
		100.0 * sp->xdc_hits / (double)(sp->xdc_hits + sp->xdc_misses));

        lprintf(cid, "xicache: hits=%lld (%.2llf%%), misses=%lld (%.2llf%%), "\
		"avg hits=(%.2llf%%)\n",
		xic_hits,
		100.0 * xic_hits / (double)(xic_hits + xic_misses),
		xic_misses,
		100.0 * xic_misses / (double)(xic_hits + xic_misses),
		100.0 * sp->xic_hits / (double)(sp->xic_hits + sp->xic_misses));

        sp->prev_xdc_hits = sp->xdc_hits;
        sp->prev_xdc_misses = sp->xdc_misses;
        sp->prev_xic_hits = sp->xic_hits;
        sp->prev_xic_misses = sp->xic_misses;

	lprintf(-1, "Instn count delta : %lld\n", (uint64_t)icount - sp->prev_icount);
	sp->prev_icount = (uint64_t)icount;

	/*
	 * exit_at is parsed from the conf file.
	 * If it's non-zero, we exit the simulator when we reach or
	 * exceed that number of instns
	 */
	if ((sp->proc_debugp->exit_at != 0) && (icount >= sp->proc_debugp->exit_at)) {
		fatal("Reached the value for exit_at in conf file 0x%llx, "\
		    "current icount=0x%llx\n",
			sp->proc_debugp->exit_at, icount);
	}
#endif	/* PERFORMANCE_CHECK	} */
}




	/*************************************************************
	 *
	 * Execution support functions (and instruction impls)
	 *
	 *************************************************************/













	/*************************************************************
	 *
	 * Debugger interface support functions
	 *
	 *************************************************************/




bool_t sparcv9_regread(sparcv9_cpu_t * v9p, int regnum, uint64_t * valp)
{
	simcpu_t * sp;
	sparcv9_reg_t regn = regnum;
	int idx;

	sp = v9p->simp;

		/* Assume that everything we care about has
		 * been written back to the architectural
		 * register file.
		 */

	assert( v9p->active_window == -1 );
	assert( v9p->active_global == -1 );

	if (regn>=Reg_sparcv9_g0 && regn<=Reg_sparcv9_g7) {
		assert(v9p->gl < v9p->nglobals);
		idx = 8*v9p->gl + (regn-Reg_sparcv9_g0);
		*valp = v9p->globalsp[idx];
	} else
	if (regn>=Reg_sparcv9_r8 && regn<=Reg_sparcv9_r23) {
		assert(v9p->cwp < v9p->nwins);
		idx = (v9p->nwins-1 - v9p->cwp)*2*V9_REG_GROUP + (regn-Reg_sparcv9_r8);
		*valp = v9p->winsp[idx];
	} else
	if (regn>=Reg_sparcv9_r24 && regn<=Reg_sparcv9_r31) {
		assert(v9p->cwp < v9p->nwins);
		idx = (v9p->cwp == 0) ? 0 : (v9p->nwins-1 - v9p->cwp)*2*V9_REG_GROUP;
		idx += (regn-Reg_sparcv9_r24);
		*valp = v9p->winsp[idx];
	} else {
		uint64_t	val;
		switch (regn) {
		case Reg_sparcv9_pc:	val = v9p->simp->pc;	break;
		case Reg_sparcv9_npc:	val = v9p->simp->npc;	break;
		case Reg_sparcv9_ccr:	val = sp->v9_ccr;	break;
#if 0 /* { */
		case Reg_sparcv9_fsr:
		case Reg_sparcv9_fprs:
#endif /* } */
		case Reg_sparcv9_y:	val = sp->v9_y;		break;
		case Reg_sparcv9_asi:	val = sp->v9_asi;	break;
#if 0 /* { */
		case Reg_sparcv9_ver:
		case Reg_sparcv9_tick:
#endif /* } */
		case Reg_sparcv9_pil:	val = v9p->pil;		break;
		case Reg_sparcv9_pstate:
			val = v9p->pstate.priv ? (1<<V9_PSTATE_PRIV_BIT) : 0;
			val |= v9p->pstate.mm << V9_PSTATE_MM_BITS;
			val |= v9p->pstate.int_enabled ? (1<< V9_PSTATE_IE_BIT) : 0;
			val |= v9p->pstate.fpu_enabled ? (1<< V9_PSTATE_PEF_BIT) : 0;
			val |= v9p->pstate.tle ? (1<< V9_PSTATE_TLE_BIT) : 0;
			val |= v9p->pstate.cle ? (1<< V9_PSTATE_CLE_BIT) : 0;
			val |= v9p->pstate.tct ? (1<< V9_PSTATE_TCT_BIT) : 0;
			break;

		case Reg_sparcv9_tstate:val = N_TSTATE(v9p, v9p->tl);	break;
		case Reg_sparcv9_tba:	val = v9p->tba;		break;
		case Reg_sparcv9_tl:	val = v9p->tl;		break;
		case Reg_sparcv9_tt:	val = N_TT(v9p, v9p->tl);	break;
		case Reg_sparcv9_tpc:	val = N_TPC(v9p, v9p->tl);	break;
		case Reg_sparcv9_tnpc:	val = N_TNPC(v9p, v9p->tl);	break;
		case Reg_sparcv9_wstate:
			val = v9p->wstate_normal << V9_WSTATE_NORMAL_BITS;
			val |= v9p->wstate_other << V9_WSTATE_OTHER_BITS;
			break;
		case Reg_sparcv9_cwp:		val = v9p->cwp;		break;
		case Reg_sparcv9_cansave:	val = v9p->cansave;	break;
		case Reg_sparcv9_canrestore:	val = v9p->canrestore;	break;
		case Reg_sparcv9_cleanwin:	val = v9p->cleanwin;	break;
		case Reg_sparcv9_otherwin:	val = v9p->otherwin;	break;
#if 0 /* { */
		case Reg_sparcv9_asr16:
		case Reg_sparcv9_asr17:
		case Reg_sparcv9_asr18:
		case Reg_sparcv9_asr19:
		case Reg_sparcv9_asr20:
		case Reg_sparcv9_asr21:
		case Reg_sparcv9_asr22:
		case Reg_sparcv9_asr23:
		case Reg_sparcv9_asr24:
		case Reg_sparcv9_asr25:
		case Reg_sparcv9_asr26:
		case Reg_sparcv9_asr27:
		case Reg_sparcv9_asr28:
		case Reg_sparcv9_asr29:
		case Reg_sparcv9_asr30:
		case Reg_sparcv9_asr31:
		case Reg_sparcv9_icc:
		case Reg_sparcv9_xcc:
		case Reg_sparcv9_fcc0:
		case Reg_sparcv9_fcc1:
		case Reg_sparcv9_fcc2:
		case Reg_sparcv9_fcc3:
#endif /* } */

		default:
#if 0 /* { */
			return false;	/* unknown / unsupported regnum */
#endif /* } */
			val = 0x0badcafedeadbeef;
			break;
		}
		*valp = val;
	}

	return true;	/* ok */
}


	/* returns false on failure */

bool_t sparcv9_regwrite(sparcv9_cpu_t * v9p, int regnum, uint64_t val)
{
	sparcv9_reg_t regn = regnum;
	int idx;

	if (regn>=Reg_sparcv9_g0 && regn<=Reg_sparcv9_g7) {
		assert(v9p->gl < v9p->nglobals);
		idx = 8*v9p->gl + (regn-Reg_sparcv9_g0);
		v9p->globalsp[idx] = val;
	} else
	if (regn>=Reg_sparcv9_r8 && regn<=Reg_sparcv9_r23) {
		assert(v9p->cwp < v9p->nwins);
		idx = (v9p->nwins-1 - v9p->cwp)*2*V9_REG_GROUP + (regn-Reg_sparcv9_r8);
		v9p->winsp[idx] = val;
	} else
	if (regn>=Reg_sparcv9_r24 && regn<=Reg_sparcv9_r31) {
		assert(v9p->cwp < v9p->nwins);
		idx = (v9p->cwp == 0) ? 0 : (v9p->nwins-1 - v9p->cwp)*2*V9_REG_GROUP;
		idx += (regn-Reg_sparcv9_r24);
		v9p->winsp[idx] = val;
	} else
	switch (regn) {
	case Reg_sparcv9_pc:
		v9p->simp->pc = val;
		break;
	case Reg_sparcv9_npc:
		v9p->simp->npc = val;
		break;
	case Reg_sparcv9_cwp:
		v9p->cwp = val >= v9p->nwins ? v9p->nwins-1 : val;	/* saturate not wrap !*/
		break;

	default:
		return false;	/* unknown / unsupported regnum */
	}
	return true;	/* OK */
}






bp_info_t * globalbpinfop;


void sparcv9_set_break(sparcv9_cpu_t * v9p, tvaddr_t bpaddr)
{
	simcpu_t * sp;

	if (globalbpinfop == NULL) globalbpinfop = breakpoint_init();

	sp = v9p->simp;

	breakpoint_insert(globalbpinfop, bpaddr, DEFAULT_BP_CONTEXT);

	if (sp->bp_infop == NULL) sp->bp_infop = globalbpinfop;
}

void sparcv9_set_break_next(sparcv9_cpu_t *v9p)
{
	simcpu_t *sp;

	if (globalbpinfop == NULL) globalbpinfop = breakpoint_init();

	sp = v9p->simp;

	breakpoint_insert_next(globalbpinfop);

	if (sp->bp_infop == NULL) sp->bp_infop = globalbpinfop;
}

void sparcv9_clear_break_next(sparcv9_cpu_t *v9p)
{
	simcpu_t *sp;

	if (globalbpinfop == NULL) globalbpinfop = breakpoint_init();

	sp = v9p->simp;

	breakpoint_clear_next(globalbpinfop);

	if (sp->bp_infop == NULL) sp->bp_infop = globalbpinfop;
}

bool_t sparcv9_hit_break(sparcv9_cpu_t *v9p, tvaddr_t bpaddr)
{
	simcpu_t *sp;

	sp = v9p->simp;
	if (sp->bp_infop == (bp_info_t*)0) return false;

	return breakpoint_find_by_addr(sp->bp_infop, bpaddr, DEFAULT_BP_CONTEXT) != NULL;
}

void sparcv9_clear_break(sparcv9_cpu_t * v9p, tvaddr_t bpaddr)
{
	simcpu_t * sp;

	sp = v9p->simp;
	if (sp->bp_infop == (bp_info_t*)0) return;

	breakpoint_delete_by_addr(sp->bp_infop, bpaddr, DEFAULT_BP_CONTEXT);

	if (sp->bp_infop->active_count == 0) sp->bp_infop = (bp_info_t*)0;
}













		/*------------------------------------------------------------*
		 *
		 * SPARC register windows are not nice.
		 *
		 * So here's how they are implemented in Legion;
		 *
		 * For each window shuffle - we copy the old window frame out
		 * and copy the new one in to the working register file from
		 * the architectural (sparc) one. We could optmise the
		 * common case of incrementing or decrementing the ccurrent
		 * window pointer (cwp), but this version deals with all cases.
		 *
		 * The v9p->active_window tells us which frame is supposed to be
		 * in the simcpu_t register file, and new_window the window we want.
		 *
		 * If new_window == -1 we just want to write back into the architectural
		 * file. If v9p->active_window == -1 we need to retrieve from the
		 * architectural file.
		 *
		 * OK the mess is worse:
		 *	regs	8-15 = outs
		 *		16-23 = locals
		 *		24-31 = ins.
		 *
		 * If cwp is incremented, the old outs become the new ins
		 * (yes I know it's backwards).
		 * Anyway so that the rotation works correctly ( and without
		 * decoding %g0 as sim register 31), we work our way down the
		 * architectural register array as we increment cwp.
		 * So what was register offset X becomes register offset X+16 as we
		 * increment cwp.
		 *
		 * The only special case is (by our choice) window 0, which
		 * sits at (nwins-1)*16 as a base, but the ins (regs 24-31)
		 * are placed at the bottom of the architectural array starting at 0
		 *
		 *------------------------------------------------------------*/

void sparcv9_active_window(simcpu_t * sp, int new_window)
{
	sparcv9_cpu_t * v9p;
        uint64_t *sim_regp, *arch_regp;
	int      i;

	v9p = (sparcv9_cpu_t *)(sp->specificp);

	if (v9p->active_window == -1) goto load_window;

		/* OK stash back the old window's contents */

	arch_regp = &(v9p->winsp[(v9p->nwins -1 - v9p->active_window)*2*V9_REG_GROUP]);
	sim_regp = &sp->intreg[V9_OUT_OFFSET];

	if (v9p->active_window == 0) {

                for (i = V9_REG_GROUP * 2; i > 0;i-- ) {
			*arch_regp++ = *sim_regp++;
                }

	        arch_regp = &v9p->winsp[0];
	        sim_regp  = &sp->intreg[V9_IN_OFFSET];

                for (i = V9_REG_GROUP; i > 0; i--) {
			*arch_regp++ = *sim_regp++;
                }
	} else {
                for (i = V9_REG_GROUP * 3; i > 0; i--) {
			*arch_regp++ = *sim_regp++;
                }
	}

	v9p->active_window = -1;	/* tag our cached window state as invalid */

load_window:;

	if (new_window == -1) return;	/* bail out */

	arch_regp = &(v9p->winsp[(v9p->nwins -1 - new_window)*2*V9_REG_GROUP]);
	sim_regp = &sp->intreg[V9_OUT_OFFSET];

	if (new_window == 0) {

                for (i = V9_REG_GROUP * 2; i > 0;i-- ) {
			*sim_regp++ = *arch_regp++;
                }

	        arch_regp = &v9p->winsp[0];
	        sim_regp  = &sp->intreg[V9_IN_OFFSET];

                for (i = V9_REG_GROUP; i > 0; i--) {
			*sim_regp++ = *arch_regp++;
                }
	} else {
                for (i = V9_REG_GROUP * 3; i > 0; i--) {
			*sim_regp++ = *arch_regp++;
                }
	}

	v9p->active_window = new_window;
}





	/*
	 * Basically same story - except that the globals are easier
	 * to handle as there is no overlap.
	 */

void sparcv9_active_globals(simcpu_t * sp, int new_global)
{
	sparcv9_cpu_t * v9p;
        uint64_t *sim_regp, *arch_regp;
	int      i;

	v9p = (sparcv9_cpu_t *)(sp->specificp);

	if (v9p->active_global == -1) goto load_global;

		/* OK stash back the old globals' contents */

	arch_regp = &(v9p->globalsp[(v9p->active_global)*V9_GLOBAL_GROUP]);
	sim_regp = &sp->intreg[V9_GLOBAL_OFFSET];

	for (i = V9_GLOBAL_GROUP; i > 0;i-- ) {
		*arch_regp++ = *sim_regp++;
	}

	v9p->active_global = -1;	/* tag our cached global state as invalid */

load_global:;

	if (new_global == -1) return;	/* bail out */

	arch_regp = &(v9p->globalsp[new_global*V9_GLOBAL_GROUP]);
	sim_regp = &sp->intreg[V9_GLOBAL_OFFSET];

	for (i = V9_GLOBAL_GROUP; i > 0; i--) {
		*sim_regp++ = *arch_regp++;
	}

	v9p->active_global = new_global;
}






void sparcv9_save_instr(simcpu_t * sp, int rdest_num, tvaddr_t newval)
{
	sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);

#if HYPERPRIVILEGED_USE_WARN /* { */
	if (V9_RED == v9p->state || V9_HyperPriv == v9p->state)
		EXEC_WARNING(("save instruction in hyperprivileged mode (%%pc=0x%llx)", sp->pc));
#endif /* HYPERPRIVILEGED_USE_WARN */ /* { */

		/* Possible spill trap ? */
	if (v9p->cansave == 0) {
		sparcv9_trap_type_t tt;
		if (v9p->otherwin != 0) {
			tt = Sparcv9_trap_spill_0_other | (v9p->wstate_other<<2);
		} else {
			tt = Sparcv9_trap_spill_0_normal | (v9p->wstate_normal<<2);
		}
		v9p->post_precise_trap(sp, tt);
		return;
	}

		/* clean win trap ? */
	if ((v9p->cleanwin - v9p->canrestore)==0) {
		v9p->post_precise_trap(sp, Sparcv9_trap_clean_window);
		return;
	}

		/* Increment the cwp */
	v9p->cwp = INC_MOD(v9p->cwp, v9p->nwins);
	v9p->cansave = DEC_MOD(v9p->cansave, v9p->nwins);
	v9p->canrestore = INC_MOD(v9p->canrestore, v9p->nwins);
	sparcv9_active_window(sp, v9p->cwp);

	if (!Zero_Reg(rdest_num)) sp->intreg[rdest_num] = newval;

	NEXT_INSTN(sp);
}



void sparcv9_restore_instr(simcpu_t * sp, int rdest_num, tvaddr_t newval)
{
	sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);

#if HYPERPRIVILEGED_USE_WARN /* { */
	if (V9_RED == v9p->state || V9_HyperPriv == v9p->state)
		EXEC_WARNING(("restore instruction in hyperprivileged mode (%%pc=0x%llx)", sp->pc));
#endif /* HYPERPRIVILEGED_USE_WARN */ /* { */

	if (v9p->canrestore == 0) {
		sparcv9_trap_type_t tt;
		if (v9p->otherwin != 0) {
			tt = Sparcv9_trap_fill_0_other | (v9p->wstate_other<<2);
		} else {
			tt = Sparcv9_trap_fill_0_normal | (v9p->wstate_normal<<2);
		}
		v9p->post_precise_trap(sp, tt);
		return;
	}

		/* Decrement the cwp */
	v9p->cwp = DEC_MOD( v9p->cwp, v9p->nwins );
	v9p->cansave = INC_MOD(v9p->cansave, v9p->nwins);
	v9p->canrestore = DEC_MOD(v9p->canrestore, v9p->nwins);
	sparcv9_active_window(sp, v9p->cwp);

	if (!Zero_Reg(rdest_num)) sp->intreg[rdest_num] = newval;

	NEXT_INSTN(sp);
}




void sparcv9_return_instr(simcpu_t * sp, tvaddr_t targetpc)
{
	sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);

#if HYPERPRIVILEGED_USE_WARN /* { */
	if (V9_RED == v9p->state || V9_HyperPriv == v9p->state)
		EXEC_WARNING(("return instruction in hyperprivileged mode (%%pc=0x%llx)", sp->pc));
#endif /* HYPERPRIVILEGED_USE_WARN */ /* { */

	if (v9p->canrestore == 0) {
		sparcv9_trap_type_t tt;
		if (v9p->otherwin != 0) {
			tt = Sparcv9_trap_fill_0_other | (v9p->wstate_other<<2);
		} else {
			tt = Sparcv9_trap_fill_0_normal | (v9p->wstate_normal<<2);
		}
		v9p->post_precise_trap(sp, tt);
		return;
	}

		/* Decrement the cwp */
	v9p->cwp = DEC_MOD( v9p->cwp, v9p->nwins );
	v9p->cansave = INC_MOD(v9p->cansave, v9p->nwins);
	v9p->canrestore = DEC_MOD(v9p->canrestore, v9p->nwins);
	sparcv9_active_window(sp, v9p->cwp);

	targetpc &= ~0x3;
	SET_PC_WITH_DS(sp, targetpc);
}





        /*
         * Other misc instruction implementations
         */

void sparcv9_udiv64(simcpu_t * sp, int rdest_num, uint64_t a, uint64_t b)
{
	if (b == (uint64_t)0) {
		sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);
		v9p->post_precise_trap(sp, Sparcv9_trap_division_by_zero);
		return;
	}

	if (!Zero_Reg(rdest_num)) sp->intreg[rdest_num] = a/b;

	NEXT_INSTN(sp);
}




void sparcv9_trapcc(simcpu_t * sp, uint64_t tnum, int cc, cond_type_t cond)
{
	int ccr;
	sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);

	if (V9_User==v9p->state)
		tnum &= 0x7f;
	else
		tnum &= 0xff;

	ccr = sp->v9_ccr;
	if (cc) ccr>>=4;

	if ( (sparcv9_cc_magic[cond] >> (ccr & 0xf)) &1 ) {
		sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);

		if (SS_MAGIC_TRAP_CC(cc) && SS_MAGIC_TRAP(sp, tnum)) {
			NEXT_INSTN(sp);
			return;
		}

		v9p->post_precise_trap(sp, tnum + Sparcv9_trap_trap_instruction);
		return;
	}

	NEXT_INSTN(sp);
}







	/*
	 * In the event an instruction implementation needs to generate
	 * a floating point exception, this function is called
	 */

void sparcv9_deliver_ieee_exception(simcpu_t * sp)
{
	sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);
	uint64_t m;

	sp->v9_fsr_ctrl &= ~V9_FSR_FTT_MASK;
	sp->v9_fsr_ctrl |= SPARCv9_FTT_IEEE_754_exception << V9_FSR_FTT_SHIFT;

	/* CEXC bits are modified by TEM bits when a trap is taken */
	m = sp->v9_fsr_exc & sp->v9_fsr_tem &
	    (V9_FSR_OF_BIT|V9_FSR_UF_BIT|V9_FSR_NX_BIT);
	if (m != 0) {
		/* prioritize exception */
		if (m & V9_FSR_OF_BIT)
			m = V9_FSR_OF_BIT;
		else if (m & V9_FSR_UF_BIT)
			m = V9_FSR_UF_BIT;
		sp->v9_fsr_exc &= ~(V9_FSR_OF_BIT|V9_FSR_UF_BIT|V9_FSR_NX_BIT);
		sp->v9_fsr_exc |= m;
	}

DBGFSR( lprintf(sp->gid, "sparcv9_deliver_ieee_exception: pc=0x%llx, fsr=0x%llx\n", sp->pc, ss_get_fsr(sp)); );

	v9p->post_precise_trap(sp, Sparcv9_trap_fp_exception_ieee_754);
}

#ifndef FP_DECODE_DISABLED
	/*
	 * In the event an instruction implementation needs to generate
	 * a floating point disabled exception, this function is called
	 */

void sparcv9_deliver_fp_disabled_exception(simcpu_t * sp)
{
	sparcv9_cpu_t * v9p = (sparcv9_cpu_t *)(sp->specificp);

	v9p->post_precise_trap(sp, Sparcv9_trap_fp_disabled);
}
#endif /* FP_DECODE_DISABLED */


#define BSWAP_8(x)      ((x) & 0xff)
#define BSWAP_16(x)     ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
#define BSWAP_32(x)     ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
#define BSWAP_64(x)     ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))

uint64_t sparcv9_invert_endianess(uint64_t *regp, uint32_t count)
{
	uint64_t new_reg;

	switch (count) {
	case 1:
		new_reg = (uint64_t)BSWAP_8(*regp);
		break;
	case 2:
		new_reg = (uint64_t)BSWAP_16(*regp);
		break;
	case 4:
		new_reg = (uint64_t)BSWAP_32(*regp);
		break;
	case 8:
		new_reg = (uint64_t)BSWAP_64(*regp);
		break;
	default:
		fatal("sparcv9_invert_endianess() count of %d - not supported", count);
	}

	return (new_reg);
}


	/*************************************************************
	 *
	 * Functions and variables to assist with debugging
	 * Legion's SPARC v9 support.
	 *
	 *************************************************************/



char * sparcv9_state_name[] = {
        "V9_UnInitialised",       /* Not a legit state - tells us allocated but not inited */
        "V9_User",
	"V9_Priv",
	"V9_HyperPriv",
	"V9_RED",
	"V9_Error"
};

void ss_iflush_by_va(simcpu_t * sp, tvaddr_t va, int asi, int gran) {	
        /* 
	 * The current xicache implementation is completely coherent
	 * with memory.  Therefore, there is no need to flush the 
	 * xicache upon a flush instruction. If the xicache is ever 
	 * changed to non coherent, this routine will probably need 
	 * to call xicache_flush_line() and propagate the flush to 
	 * other strands. But for now, it is a noop.
	 * 
	 * NOTE: asi == -1 for implicit ASI
	 */
}

void ss_iflush_by_pa(simcpu_t * sp, uint64_t pa, int gran) {
        /* 
	 * The current xicache implementation is completely coherent
	 * with memory.  Therefore, there is no need to flush the 
	 * xicache upon a flush instruction. If the xicache is ever 
	 * changed to non coherent, this routine will probably need 
	 * to call xicache_flush_line() and propagate the flush to 
	 * other strands. But for now, it is a noop.
	 */
}

#if !defined(NDEBUG)	/* { */

void sparcv9_dump_intregs(simcpu_t *sp)
{
	int i;

	for (i = 0; i < 8; i++) {
		lprintf(sp->gid, "g%d=0x%016llx  o%d=0x%016llx  l%d=0x%016llx  i%d=0x%016llx\n",
			i, sp->intreg[i],
			i, sp->intreg[i+8],
			i, sp->intreg[i+16],
			i, sp->intreg[i+24] );
	}
}


/*
 * dump out:
 *	- Trap Stack
 *	- global, out, local and in registers
 *	- global level registers
 */
void sparcv9_dump_state(simcpu_t *sp)
{
	int i;
	uint64_t *gp;
	sparcv9_cpu_t *v9p = (sparcv9_cpu_t *)(sp->specificp);
	int id;

	id = sp->gid;

		/* dump current v9 CPU state */
	lprintf(id, "cpu %d : cycle=0x%llx state=%s : PC=0x%llx NPC=0x%llx TL=%d\n",
	    id, sp->cycle, sparcv9_state_name[v9p->state], sp->pc,
	    sp->npc, v9p->tl);

	for (i=1; i<=v9p->maxtl; i++) lprintf(id, "tstack: [%d]\tTSTATE=0x%llx\tTT=0x%llx\tTPC=0x%llx\tTNPC=0x%llx\tHTSTATE=0x%llx\n",
		i, N_TSTATE(v9p, i), N_TT(v9p, i), N_TPC(v9p, i), N_TNPC(v9p, i), N_HTSTATE(v9p, i) );

	sparcv9_dump_intregs(sp);

	lprintf(id, "%%asi: 0x%llx : cwp=0x%x\n", sp->v9_asi, v9p->cwp);
	lprintf(id, "cansave=0x%x : canrestore=0x%x : otherwin=0x%x : cleanwin=0x%x : wstate other=0x%x, normal=0x%x\n",
		v9p->cansave, v9p->canrestore, v9p->otherwin, v9p->cleanwin,
		v9p->wstate_other, v9p->wstate_normal);
	if (v9p->gl > 0) {
		lprintf(id, "globals[%d (gl-1)]:\n", v9p->gl - 1);
		gp = &v9p->globalsp[(v9p->gl - 1) * V9_GLOBAL_GROUP];
		for (i = 1; i < V9_GLOBAL_GROUP; i++)
			lprintf(id, " %%g%d = 0x%016llx\n", i, gp[i]);
	}
}


/*
 * trace output:
 *	- current instruction (passed in)
 *	- state
 *	- Trap Stack
 *	- global, out, local and in registers
 *	- global level registers
 *
 * This function holds the log lock to ensure that all the output happens
 * atomically.
 *
 */
void sparcv9_trace_output(simcpu_t *sp, uint32_t instn)
{
	int i;
	uint64_t *gp;
	int id;
	sparcv9_cpu_t *v9p;
	char ibuf[160];

	v9p = (sparcv9_cpu_t *)sp->specificp;

	id = sp->gid;

	log_lock();

DBGEL(
	log_printf(-1, "\n");
	log_printf(sp->gid, "[0x%llx:%s]\n",
		ICOUNT(sp), sparcv9_state_name[v9p->state] );

	for (i = 0; i < 8; i++) {
		log_printf(sp->gid, "g%d=0x%016llx  o%d=0x%016llx  l%d=0x%016llx  i%d=0x%016llx\n",
			i, sp->intreg[i],
			i, sp->intreg[i+8],
			i, sp->intreg[i+16],
			i, sp->intreg[i+24] );
	}

	if (v9p->tl > 0) {
		for (i=1; i<=v9p->tl; i++) {
			log_printf(id, "tstack[%d]:\thtstate=0x%llx\ttstate=0x%llx\ttt=0x%llx\ttpc=0x%llx\ttnpc=0x%llx\n",
				i, N_HTSTATE(v9p, i), N_TSTATE(v9p, i), N_TT(v9p, i), N_TPC(v9p, i), N_TNPC(v9p, i) );
		}
	}
	if (v9p->gl > 0) {
		for (i=0; i<v9p->gl; i++) {
			gp = &v9p->globalsp[i * V9_GLOBAL_GROUP];
			log_printf(id, "global[%d]:\t%%g1=0x%llx %%g2=0x%llx %%g3=0x%llx %%g4=0x%llx %%g5=0x%llx %%g6=0x%llx %%g7=0x%llx\n",
			i, gp[1], gp[2], gp[3], gp[4], gp[5], gp[6], gp[7]
				);
		}
	}
);

	sparcv9_idis(ibuf, 160, instn, sp->pc);
DBGELMIN(
	log_printf(sp->gid, "[0x%llx:%.6s] pc=0x%llx npc=0x%llx tl=%d gl=%d asi=0x%x instn=%08x: %s\n",
		ICOUNT(sp), sparcv9_state_name[v9p->state],
		sp->pc, sp->npc, v9p->tl, v9p->gl, sp->v9_asi, instn, ibuf);
);

	log_unlock();
}


#endif			/* } */

#ifdef	OPENSPARC /* { */
/*
 * Dummy function to dis-assemble and instn
 */
void sparcv9_idis(char * bufp, int size, uint32_t instn, tvaddr_t address)
{
	snprintf(bufp, size, ".word\t0x%08x", instn);
}
#endif /* } */

